rm(list = ls())
library(tidyverse)

3.2.2 Creating ggplot

ggplot(data  = mpg ) + geom_point(mapping = aes(x = displ, y = hwy))

# seems same
ggplot(data  = mpg, aes(x = displ, y = hwy) ) + geom_point()

3.2.3

template

######################################################################
# ggplot(data = <DATA>) + <GEOM_FUNCTION>(mapping = aes(<MAPPINGS>)) #
######################################################################
# put data in <DATA>
# put function (geom_point etc) in <GEOM_FUNCTION>
# put variables to plot in <MAPPINGS>

3.2.4 Exercises

scatter plot of class vs drv

ggplot(data = mpg) + geom_point(mapping = aes(x = class, y = drv))

3.3

ggplot(data  = mpg ) + geom_point(mapping = aes(x = displ, y = hwy, color = class))

* change the color of the points

ggplot(data  = mpg ) + geom_point(mapping = aes(x = displ, y = hwy, size = class))
## Warning: Using size for a discrete variable is not advised.

### you will get a warning here * size = will change the size of the points * using size = for discrete data is not a good idea

ggplot(data  = mpg ) + geom_point(mapping = aes(x = displ, y = hwy, alpha = class))

* change the shade of the points (black-white)

ggplot(data  = mpg ) + geom_point(mapping = aes(x = displ, y = hwy, shape = class))
## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have 7.
## Consider specifying shapes manually if you must have them.
## Warning: Removed 62 rows containing missing values (geom_point).

* change the shape of the points * additional groups will go unplotted when you use the shape aesthetic and * the number of class exceed the default number of shapes (6)

ggplot(data  = mpg ) + geom_point(mapping = aes(x = displ, y = hwy), color = "blue")

* change the color of all plots * note that color = is outisude aes( )

3.3.1 Exercises

3

ggplot(data  = mpg ) + geom_point(mapping = aes(x = displ, y = hwy, color = cyl))

* you can use color or size, but not shape for continuous variables

5

ggplot(data  = mpg ) + geom_point(mapping = aes(x = displ, y = hwy, stroke = 6))

* Stroke changes the color of the border for shapes (22-24).

6

ggplot(data  = mpg ) + geom_point(mapping = aes(x = displ, y = hwy, color = displ < 5))

  • In this case, it is logical variable which is TRUE or FALSE
  • This also explains exercise 1, color = “blue” created a categorical variable
  • that only had one category: “blue”.

3.5: split your plot into facets, subplots that each desplay one subset of the dataset

ggplot(data = mpg) + 
    geom_point(mapping = aes(x = displ, y = hwy)) +
    facet_wrap(~ class, nrow = 2)

* The variable that you pass to facet_wrap() should be discrete.

ggplot(data = mpg) + 
    geom_point(mapping = aes(x = displ, y = hwy)) +
    facet_wrap(~ class, nrow = 3)

ggplot(data = mpg) + 
    geom_point(mapping = aes(x = displ, y = hwy)) +
    facet_grid(drv ~ cyl)

* facet your plot on the combination of the two variables * ex) the plot above show the plot of x and y for each pair (drv, cyl)

ggplot(data = mpg) + 
    geom_point(mapping = aes(x = displ, y = hwy)) +
    facet_grid(drv ~ .)

* devide baed on one variable by row = plot drv on the y-axis

ggplot(data = mpg) + 
    geom_point(mapping = aes(x = displ, y = hwy)) +
    facet_grid(. ~ drv)

* devide baed on one variable by column = plot drv on the x-axis

3.6

ggplot(data = mpg) + 
    geom_smooth(mapping = aes(x = displ, y = hwy))
## `geom_smooth()` using method = 'loess'

ggplot(data = mpg) + 
    geom_smooth(mapping = aes(x = displ, y = hwy, linetype = drv))
## `geom_smooth()` using method = 'loess'

* separates the cars into three lines based on their drv value with different linetype each

ggplot(data = mpg) + 
    geom_smooth(mapping = aes(x = displ, y = hwy, group = drv))
## `geom_smooth()` using method = 'loess'

* separates the cars into three lines based on their drv value with same linetype each

ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) + 
    geom_smooth() + 
    geom_point(mapping = aes(color = class))
## `geom_smooth()` using method = 'loess'

* points are separeted by the color based on class

ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) + 
    geom_smooth(data = filter(mpg, class == "subcompact"), se = FALSE) + 
    geom_point(mapping = aes(color = class))
## `geom_smooth()` using method = 'loess'

* the local data argument in geom_smooth() (= filter(mpg, class == "subcompact")) * overrides the global data in argument ggplot() for that layer only * smooth line is drawn for the data with class == “subcompact”

3.6.1 Exercises

ggplot(data = mpg, mapping = aes(x = displ, y = hwy, color = drv)) + 
  geom_point() + 
  geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess'

* separate both the smooth line and points with different colors based on color = drv

3: show.legend = FALSE should be included in geom_point, geom_smooth, etc

  • you need `theme(lengend = “none”)

6

  • top left
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) + 
  geom_point() + 
  geom_smooth(color = "blue", se = FALSE)
## `geom_smooth()` using method = 'loess'

ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) + 
  geom_point() + 
  geom_smooth(color = "red", se = FALSE)
## `geom_smooth()` using method = 'loess'

  • top right
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) + 
  geom_point() + 
  geom_smooth(color = "blue", se = FALSE, aes(group = drv))
## `geom_smooth()` using method = 'loess'

  • mid left
ggplot(data = mpg, mapping = aes(x = displ, y = hwy, color = drv)) + 
  geom_point() + 
  geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess'

  • mid right
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) + 
  geom_point(mapping = aes(color = drv)) + 
  geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess'

# incorrect version
ggplot(data = mpg, mapping = aes(x = displ, y = hwy, color = drv)) + 
  geom_point() + 
  geom_smooth(se = FALSE, data = filter(mpg, drv == "r"))
## `geom_smooth()` using method = 'loess'

  • bottom left
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) + 
  geom_point(mapping = aes(color = drv)) + 
  geom_smooth(se = FALSE, mapping = aes(linetype = drv))
## `geom_smooth()` using method = 'loess'

  • bottom right
ggplot(data = mpg, mapping = aes(x = displ, y = hwy, fill = drv)) + 
  geom_point(color = "white", shape = 21)

# better?
ggplot(data = mpg, mapping = aes(x = displ, y = hwy, fill = drv)) + 
  geom_point(color = "white", shape = 21, size = 4)

3.7

# the two plots below are same
ggplot(data = diamonds, aes(x = cut)) + stat_count()

ggplot(data = diamonds, aes(x = cut)) + geom_bar()

demo <- tribble(
  ~a,      ~b,
  "bar_1", 20,
  "bar_2", 30,
  "bar_3", 40
)

#ggplot(demo, aes(x = a, y = b)) + geom_bar() # this does not work
ggplot(demo, aes(x = a, y = b)) + geom_bar(stat = "identity")

# stacked bar plot
# 積み重ねる型のバープロット
ggplot(data = diamonds) + 
    stat_summary(
        mapping = aes(x = cut, y = depth),
        fun.ymin = min,
        fun.ymax = max,
        fun.y = median
        )

ggplot(data = diamonds) + 
  geom_pointrange(
    mapping = aes(x = cut, y = depth),
    stat = "summary",
    fun.ymin = min,
    fun.ymax = max,
    fun.y = mean # you can use median instead of mean
    )

3.7.1

2

ggplot(data = diamonds, aes(x = cut, y = depth)) + geom_col()

# the default for geom_col: identity
# the default for geom_bar: summary

4

ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) + 
    stat_smooth() 
## `geom_smooth()` using method = 'loess'

5

ggplot(data = diamonds) + 
  geom_bar(mapping = aes(x = cut, y = ..prop..))

# this does not give what we want

ggplot(data = diamonds) + 
  geom_bar(mapping = aes(x = cut, y = ..prop.., group = 1))

# this works
# if `group` is not set to 1 or whatever, all the bars have prop == 1
# group="whatever" is a "dummy" grouping to override the default behavior
# for detail, see here -> http://stackoverflow.com/questions/39878813/r-ggplot-geom-bar-meaning-of-aesgroup-1

ggplot(data = diamonds) + 
  geom_bar(mapping = aes(x = cut, y = ..prop.., fill = color, group = color))

# this works too

3.8 Position adjustment

ggplot(data = diamonds) + 
  geom_bar(mapping = aes(x = cut, color = cut))

ggplot(data = diamonds) + 
  geom_bar(mapping = aes(x = cut, fill = cut))

ggplot(data = diamonds) + 
  geom_bar(mapping = aes(x = cut, group = cut)) # same as without `group = `

ggplot(data = diamonds, mapping = aes(x = cut, color = clarity)) + 
  geom_bar(fill = NA, position = "identity")

ggplot(data = diamonds, mapping = aes(x = cut, fill = clarity)) + 
  geom_bar(alpha = .2, position = "identity")

ggplot(data = diamonds, mapping = aes(x = cut, fill = clarity)) + 
  geom_bar(position = "fill")

* each set of stacked bar the same height * hence easy to compare the proportions across group (across differnet sets of cut) * 各barの高さは等しい、縦軸は割合なので、cut毎にclarityの割合を比べやすい

ggplot(data = diamonds, mapping = aes(x = cut, color = clarity)) + 
  geom_bar(position = "fill")

*見にくい

ggplot(data = diamonds, mapping = aes(x = cut, fill = clarity)) + 
  geom_bar(position = "dodge")

* places overlapping objects directly beside one another * This makes it easier to compare individual values. * identiyを使うと重なってしまう棒を隣り合わせて並べていくので、被るところがない * 縦軸は割合ではなく、積み重ねたカウント

# "jitter" add random noise to each point
ggplot(data  = mpg ) + geom_point(mapping = aes(x = displ, y = hwy), position = "jitter")

* deal with overplotting, 同じ値を重ねてしまう問題

3.8.1 Excercises

1

  • both cty and bwy is discrete and thus oveplotting occurs # adding jitter does not necessarily make sense because this manipulation will change information
ggplot(data  = mpg ) + geom_point(mapping = aes(x = cty, y = hwy), position = "jitter")

ggplot(data  = mpg ) + geom_point(mapping = aes(x = cty, y = hwy))

2

  • width and height to control the amount of jitter
  • vertical jitter for width and horizontal jitter for height
ggplot(data  = mpg, mapping = aes(x = cty, y = hwy) ) + geom_jitter()

ggplot(data  = mpg, mapping = aes(x = cty, y = hwy) ) + geom_jitter(width = 20, height = 20)

* too much noise

3

  • geom_count will change the size of points based on the observation each # そのポイントに落ちるデータポイントの数に基づいてプロットのサイズが変わる
ggplot(data  = mpg, mapping = aes(x = cty, y = hwy) ) + geom_jitter()

ggplot(data  = mpg, mapping = aes(x = cty, y = hwy) ) + geom_count()

4

ggplot(data = mpg, aes(x = drv, y = hwy, color = class)) +
  geom_boxplot()

ggplot(data = mpg, aes(x = drv, y = hwy, color = class)) +
  geom_boxplot(position = "identity") # convey little information because of overlaps

ggplot(data = mpg, aes(x = drv, y = hwy, fill = class)) +
  geom_boxplot()

# compare the figures above

3.9 Coordinate systems

ggplot(data = mpg, aes(x = drv, y = hwy, fill = class)) +
  geom_boxplot() +
  coord_flip()

# switch the x and y axes

ggplot(data = mpg, aes(x = drv, y = hwy, color = class)) +
  geom_boxplot() +
  coord_flip()

ggplot(data = mpg, aes(x = drv, color = class)) +
  geom_bar()

ggplot(data = mpg, aes(x = drv, group = class)) +
  geom_bar()

library(maps)
## 
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
## 
##     map
nz <- map_data("nz")

ggplot(nz, aes(x = long, y = lat, group = group)) + 
  geom_polygon(fill = "white", color = "black")

bar <- ggplot(data = diamonds) + 
  geom_bar(
    mapping = aes(x = cut, fill = cut), 
    show.legend = FALSE, 
    width = 1 
  ) + 
  theme(aspect.ratio = 1) + # `aspect.ratio` determines ratio of vartical and horizontal
  labs(x = NULL, y = NULL)

bar <- bar + coord_polar()

3.9.1

1

ggplot(mpg, aes(x = drv, fill = class)) +
  geom_bar() +
  coord_polar()

ggplot(mpg, aes(x = drv, fill = class)) +
  geom_bar() +
  coord_polar(theta = "y")

# answer?
ggplot(mpg, aes(x = factor(1), fill = class)) +
  geom_bar() # カウントとして出力される

ggplot(mpg, aes(x = factor(1), fill = class)) +
  geom_bar(positon = "fill") # 割合として出力される
## Warning: Ignoring unknown parameters: positon

ggplot(mpg, aes(x = drv, fill = class)) + 
  geom_bar(position = "fill") +
  coord_polar()

ggplot(mpg, aes(x = drv, fill = class)) + 
  geom_bar(position = "fill") +
  coord_polar(theta = "y")

# theta = "y" will give you the variable in `x =` in y axis 
# theta = "x" will give you the variable in `x =` in x axis

3

ggplot(nz, aes(x = long, y = lat, group = group)) + 
  geom_polygon() +
  coord_quickmap() # if you add this, the shape slightly changes

ggplot(nz, aes(x = long, y = lat, group = group)) + 
  geom_polygon() +
  coord_map() # if you add this, the shape slightly changes

4

ggplot(data = mpg, mapping = aes(x = cty, y = hwy)) +
  geom_point() +
  geom_abline() +# y = x degree angle line
  coord_fixed() # ensure that the abline is at a 45 degree angle